# 	Filename: logdeathsMI_source.R
# 	Description: Source script to produce results based on regressions where "logdeaths" is the dependent variable using the multiply imputed data sets. Called by logdeaths_driver.R.
# 	Author: Barry Hashimoto
# 	Date: August 2019
# 	For: Barry Hashimoto, "Autocratic Consent to International Law: the Case of the International Criminal Court's Jurisdiction," International Organization.
  
############################################################################################################
# Set the number of cores for parallel computation. 
 NumberCores <- max(availableCores()-2, 1, na.rm = T)

# Prepare data and match

 # Rename certain variables so that code from an earlier version of this files can remain unmodified.
   for (z in 1:10){full$imputations[[z]]$ruleoflaw <- full$imputations[[z]]$vdem.ruleoflaw}

# Use hyperbolic sine function to save the untransformed deaths count in the data set.
   for(i in 1:10){full$imputations[[i]]$deaths <- sinh(full$imputations[[i]]$logdeaths)} 
   
# Creeat a list from the data.
data.list <- list()
	for (i in 1:10){data.list[[i]] <- with(full$imputations[[i]], data.frame(leaderexitsoffice, logdeaths, deaths, ptssum.mean, 
		RomeStatuteRatification, allunemp, maleunemp, loggrowth, logpop, logrefugees, rug, eth, rel, logoilrent, logexpmil, 
		logsoldierspercent, capital.scaled, gdp.scaled, ruleoflaw, democracy, common, mixed, islam, pre98conflict, medianIMR, 
		ccode, lcode, year, quarter, deaths.trend.single, deaths.trend.country))
		}

# A function to stop excess verbosity when running matchit().
 quiet <- function(x) { 
   sink(tempfile()) 
   on.exit(sink()) 
   invisible(force(x)) 
  }

# Define cutpoints for coarsened exact matching on continuous variables

match.list <- list()

for (i in 1:10){
	set1 <- data.list[[i]]
	breaks <- 1/2
	rug.cuts <- quantile(set1$rug, seq(0,1,breaks))
	logexpmil.cuts <- quantile(set1$logexpmil, seq(0,1,breaks))
	logsoldierspercent.cuts <- quantile(set1$logsoldierspercent, seq(0,1,breaks))
	logpop.cuts <- quantile(set1$logpop, seq(0,1,breaks))
	eth.cuts <- quantile(set1$eth, seq(0,1,breaks))
	rel.cuts <- quantile(set1$rel, seq(0,1,breaks))
	logoilrent.cuts <- quantile(set1$logoilrent, seq(0,1,breaks))
	maleunemp.cuts <- quantile(set1$maleunemp, seq(0,1,breaks))
	loggrowth.cuts <- quantile(set1$loggrowth, seq(0,1,breaks))
	gdp.cuts <- quantile(set1$gdp.scaled, seq(0,1,breaks))
	capital.cuts <- quantile(set1$capital.scaled, seq(0,1,breaks))
	rol.cuts <- quantile(set1$ruleoflaw, seq(0,1,breaks))
	pts.cuts<- quantile(set1$ptssum.mean, seq(0,1,breaks))
	imr.cuts <- quantile(set1$medianIMR, seq(0,1,breaks))
	refugees.cuts <- quantile(set1$logrefugees, seq(0,1,breaks))

	cut.list <- list(rug = rug.cuts, logexpmil = logexpmil.cuts, logsoldierspercent = logsoldierspercent.cuts, 
	logpop = logpop.cuts, eth = eth.cuts, rel = rel.cuts, logoilrent = logoilrent.cuts, maleunemp = maleunemp.cuts, 
	loggrowth = loggrowth.cuts, gdp.scaled = gdp.cuts, capital.scaled = capital.cuts, ruleoflaw = rol.cuts, 
	ptssum.mean = pts.cuts, medianIMR = imr.cuts, logrefugees = refugees.cuts)

	match.list[[i]] <- quiet(matchit(RomeStatuteRatification ~ logexpmil + logsoldierspercent + logpop + eth + rel + rug 
	+ logrefugees + logoilrent + loggrowth + maleunemp + gdp.scaled + ruleoflaw + capital.scaled + common + mixed 
	+ islam + pre98conflict + medianIMR + ptssum.mean, 
	data = data.list[[i]], method = "cem" , cutpoints = cut.list))
	}

    #summary(match.list[[i]])

####################################################################################################
# H3: antiregime violence as proxied by the logdeaths variable (approx. log battle deaths).

options(warn=-1) # Turn off warnings in OLS fits relating to the dropped reference categories in the ccode and year fixed effects.

ZeligLM <- function(x){do.call(zelig,args=list(formula=formula,data=x,model="ls"))}

# Full data, country and year fixed effects OLS, shared trend.
formula <- as.formula("formula = logdeaths ~ RomeStatuteRatification + logexpmil + logsoldierspercent + logpop + eth + rel + rug + logrefugees + logoilrent + loggrowth + maleunemp + gdp.scaled + ruleoflaw + capital.scaled +  deaths.trend.single + as.factor(ccode) + as.factor(year)")
model0 <- mclapply(data.list, ZeligLM, mc.cores = NumberCores, mc.preschedule = FALSE)

# Full data, country and year fixed effects OLS, country-specific trends. 
formula <- as.formula("formula = logdeaths ~ RomeStatuteRatification + logexpmil + logsoldierspercent + logpop + eth + rel + rug + logrefugees + logoilrent + loggrowth + maleunemp + gdp.scaled + ruleoflaw + capital.scaled +  deaths.trend.country + as.factor(ccode) + as.factor(year)")
model00 <- mclapply(data.list, ZeligLM, mc.cores = NumberCores, mc.preschedule = FALSE)

# Matched data, shared trend.
MatchDataList <- lapply(match.list, match.data)
formula <- as.formula("logdeaths ~ RomeStatuteRatification + logexpmil + logsoldierspercent + logpop + eth + rel + rug + logrefugees + logoilrent + loggrowth + maleunemp + gdp.scaled + ruleoflaw + capital.scaled + common + mixed + islam + pre98conflict + medianIMR + ptssum.mean + deaths.trend.single")
model1 <- mclapply(MatchDataList, ZeligLM, mc.cores = NumberCores, mc.preschedule = FALSE)

# Matched data, country-specific trends
formula <- as.formula("logdeaths ~ RomeStatuteRatification + logexpmil + logsoldierspercent + logpop + eth + rel + rug + logrefugees + logoilrent + loggrowth + maleunemp + gdp.scaled + ruleoflaw + capital.scaled + common + mixed + islam + pre98conflict + medianIMR + ptssum.mean + deaths.trend.country")
model11 <- mclapply(MatchDataList, ZeligLM, mc.cores = NumberCores, mc.preschedule = FALSE)

# Alternative, slower method of estimation.
if(FALSE){
# Full data, country and year fixed effects OLS, shared trend.
model0 <- list()
for (i in 1:10){model0[[i]]<- quiet(zelig(formula = logdeaths ~ RomeStatuteRatification + logexpmil + logsoldierspercent + logpop + eth + rel + rug + logrefugees + logoilrent + loggrowth + maleunemp + gdp.scaled + ruleoflaw + capital.scaled +  deaths.trend.single + as.factor(ccode) + as.factor(year), model = "ls", data = data.list[[i]], cite = F))}
# Full data, country and year fixed effects OLS, unit-specific trends. 
model00 <- list()
for (i in 1:10){model00[[i]]<- quiet(zelig(formula = logdeaths ~ RomeStatuteRatification + logexpmil + logsoldierspercent + logpop + eth + rel + rug + logrefugees + logoilrent + loggrowth + maleunemp + gdp.scaled + ruleoflaw + capital.scaled +  deaths.trend.country + as.factor(ccode) + as.factor(year), model = "ls", data = data.list[[i]], cite = F))}
# Matched data, shared trend.
model1 <- list()
for (i in 1:10){model1[[i]]<- zelig(formula = logdeaths ~ RomeStatuteRatification + logexpmil + logsoldierspercent + logpop + eth + rel + rug + logrefugees + logoilrent + loggrowth + maleunemp + gdp.scaled + ruleoflaw + capital.scaled + common + mixed + islam + pre98conflict + medianIMR + ptssum.mean + deaths.trend.single, model = "ls", data = match.data(match.list[[i]]), cite = F)}
# Matched data, country-specific trends.
model11 <- list()
for (i in 1:10){model11[[i]]<- zelig(formula = logdeaths ~ RomeStatuteRatification + logexpmil + logsoldierspercent + logpop + eth + rel + rug + logrefugees + logoilrent + loggrowth + maleunemp + gdp.scaled + ruleoflaw + capital.scaled + common + mixed + islam + pre98conflict + medianIMR + ptssum.mean + deaths.trend.country, model = "ls", data = match.data(match.list[[i]]), cite = F)}
}

# Function to get the estimates and standard errors from the model objects.
pool3 <- function(x, n){
  betas <- list()
  for(i in 1:n){betas[[i]] <- coef(x[[i]])}
  betas <- do.call(rbind, betas)
  ses <- list()
  for(i in 1:n){ses[[i]] <-  sqrt(diag(vcov(x[[i]])[[1]]))}
  ses <- do.call(rbind, ses)  
  pooled <- mi.meld(betas, ses, byrow = T)	
  pooled <- do.call(rbind, pooled)
  pooled <- t(pooled)
  pooled <- cbind(pooled, dnorm(pooled[,1]/pooled[,2]))
  colnames(pooled) <- c("Est", "SE", "p")
  return(pooled)
	}
		
# A function to get the adjusted R squared from Zelig least squares with imputed data
 get.r <- function(x, n){
    vec <- vector()
    for(k in 1:n){
	new <- summary(from_zelig_model(x[[k]]))$adj.r.squared	
	vec <- c(vec, new)
	}	
	return(c(mean(vec), NA, NA))
  }

# A function to calculate N from Zelig least squares.
 get.n <- function(x,n){
  vec <- vector()
  for(i in 1:n){vec[i] <- length(summary(from_zelig_model(x[[i]]))$residuals)}
  return(c(mean(vec), sd(vec), NA))
  }

# A function to get model output stats for the fixed effects models
 output <- function(x,n){
	mat <- rbind(pool3(x,n)[2,], get.r(x,n), get.n(x,n))
	#mat <- signif(mat, 4)             
	rownames(mat) <- c("Coef. of ICC Jurisdiction", "Adj. R-squared", "N")
	return(mat)
	}

# Print model output using the pool3 function.
  #pool3(model0, 10)
  #pool3(model00, 10)
  #pool3(model1, 10)
  #pool3(model11, 10)

# Create a table of model results
  col1 <- list(output(model0,10), output(model1,10))
  col1 <- do.call(rbind, col1)
  col2 <- list(output(model00,10), output(model11,10))
  col2 <- do.call(rbind, col2)

  deaths.table <- cbind(col1, col2)
  
  writeLines("")
  writeLines("# Print models of logdeaths WITH control variables and with country and year fixed effects for the selected regime type using the multiply imputed data. First row prints models with fixed effects and control variables. Second row prints results for models using the matched data and control variables. First column features models with a shared trend, whereas the second column features models with country-specific trends.")
  print(round(deaths.table, 3))
  
  #require(memisc)
  #print(toLatex(deaths.table))
  
  rm(model0, model00, model1, model11)

############################################################################################################
# Regression results without any control variables.

  formula <- as.formula("logdeaths ~ RomeStatuteRatification + deaths.trend.single + as.factor(ccode) + as.factor(year)")
  NC.SINGLE <- mclapply(data.list, ZeligLM, mc.cores = NumberCores, mc.preschedule = FALSE)

  formula <- as.formula("logdeaths ~ RomeStatuteRatification + deaths.trend.country + as.factor(ccode) + as.factor(year)")
  NC.COUNTRY <- mclapply(data.list, ZeligLM, mc.cores = NumberCores, mc.preschedule = FALSE)

# Alternative slower method of estimating the above.
if(FALSE){
  NC.SINGLE <- list()
  for (i in 1:10){NC.SINGLE[[i]]<- quiet(zelig(formula = logdeaths ~ RomeStatuteRatification + deaths.trend.single + as.factor(ccode) + as.factor(year), 
	model = "ls", data = data.list[[i]], cite = F))}
  NC.COUNTRY <- list()
  for (i in 1:10){NC.COUNTRY[[i]]<- quiet(zelig(formula = logdeaths ~ RomeStatuteRatification +  deaths.trend.country + as.factor(ccode) + as.factor(year), 
	model = "ls", data = data.list[[i]], cite = F))}
}
	
  col1 <- list(round(output(NC.SINGLE,10), 3), round(output(NC.COUNTRY,10), 3))
  
  writeLines("")
  writeLines("# Print models of logdeaths WITHOUT control variables but with country and year fixed effects for the selected regime type using the multiply imputed data. The first list entry prints the results for models with a shared trend. The second list entry prints the results for models with country-specific trends.")
  print(col1)
  
  rm(NC.SINGLE, NC.COUNTRY)
  
############################################################################################################
# Table of balance statistics for the supplementary appendix.
######################################################################################################

# Stats for the matched data alone, now
balance.mat.treated <- NULL
for(i in 1:length(match.list)){
balance.mat.treated <- rbind(balance.mat.treated, t(summary(match.list[[i]])$sum.matched[,1]))
}
treated.balance <- cbind(apply(balance.mat.treated,2,mean))
treated.balance <- round(treated.balance, 2)

balance.mat.control <- NULL
for(i in 1:length(match.list)){
balance.mat.control <- rbind(balance.mat.control, t(summary(match.list[[i]])$sum.matched[,2]))
}
control.balance <- cbind(apply(balance.mat.control,2,mean))
control.balance <- round(control.balance, 2)

rownames(treated.balance) <- rownames(control.balance) <- rownames(summary(match.list[[i]])$sum.matched[,1:2])
colnames(treated.balance) <- colnames(control.balance) <- c("Mean")

n.mat <- NULL
for(i in 1:length(match.list)){
	n.mat <- rbind(n.mat, summary(match.list[[i]])$nn[2,])
	}
	
N <- c(apply(n.mat,2,mean)[1], apply(n.mat,2,mean)[2])

# Statistics for control observations are in the left two columns, followed by statistics for treated observations.

matched.data.stats <- rbind(cbind(control.balance, treated.balance), N)

# Stats for all data, now

balance.mat.treated <- NULL
for(i in 1:length(match.list)){
	balance.mat.treated <- rbind(balance.mat.treated, t(summary(match.list[[i]])$sum.all[,1]))
	}

treated.balance <- cbind(apply(balance.mat.treated,2,mean))
treated.balance <- round(treated.balance, 2)

balance.mat.control <- NULL
for(i in 1:length(match.list)){
	balance.mat.control <- rbind(balance.mat.control, t(summary(match.list[[i]])$sum.all[,2]))
	}
	
control.balance <- cbind(apply(balance.mat.control,2,mean))
control.balance <- round(control.balance, 2)

rownames(treated.balance) <- rownames(control.balance) <- rownames(summary(match.list[[i]])$sum.all[,1:2])
colnames(treated.balance) <- colnames(control.balance) <- c("Mean")

n.mat <- NULL
for(i in 1:length(match.list)){
	n.mat <- rbind(n.mat, summary(match.list[[i]])$nn[1,])
	}
	
N <- c(apply(n.mat,2,mean)[1], 	apply(n.mat,2,mean)[2])

# Statistics for control observations are in the left two columns, followed by statistics for treated observations.

all.data.stats <- rbind(cbind(control.balance, treated.balance), N)

# Make table with the stats for matched and all data
  balance.stats.table <- cbind(matched.data.stats, all.data.stats)
  
  writeLines("")
  writeLines("# Print table of balance statistics for the selected regime type. Left pair of columns shows stats for matched data, control then treated groups. Right pair of columns shows stats for all data, control then treated groups. ")
  print(balance.stats.table)
  
  #toLatex(balance.stats.table)

############################################################################################################
# Simulation of SATT statistics for regressions where logdeaths is the dependant variable, reported in the text of the manuscript.
# Zelig ATT simulation. Simulate, pool, calculate. 

 unloadNamespace("arm") # This overrides sim.
 suppressPackageStartupMessages(library(dplyr))

 MatchDataList <- lapply(match.list, match.data)

# Set formula for models: Variable logexpmil is omitted b/c it has a 0 mean in control and treated groups.
 formula <- as.formula("formula = logdeaths ~ RomeStatuteRatification + logsoldierspercent + logpop + eth + rel + rug + logrefugees + logoilrent + loggrowth + maleunemp + gdp.scaled + ruleoflaw + capital.scaled + common + mixed + islam + pre98conflict + medianIMR + ptssum.mean + deaths.trend.country")

 GetSATT <- function(x){
    do.call(zelig, args = list(formula = formula, model = "ls", data = x, cite = F)) %>%
    ATT(treatment = "RomeStatuteRatification", treated = 1, num = 20000) %>%
    get_qi(qi="ATT", xvalue = "TE")
	}
	
 att.pooled <- mclapply(MatchDataList, GetSATT, mc.cores=NumberCores, mc.set.seed= 100, mc.preschedule=F) %>% unlist
    
  writeLines("")
  writeLines("# Print 95% interval plus 25th and 75th percentiles (top) and median absolute deviation (bottom) of the SATT of RomeStatuteRatification on logdeaths for states of the selected regime type.")
  print(round(quantile(att.pooled,c(.025,.25,.5,.75,.975)), digits = 3)) # 95% interval
  print(round(mad(att.pooled), digits = 4)) # median absolute deviation.

  
# Calculate the SATT as percent reduction in battle deaths on the linear scale. Parameter "n" is the counterfactual count of deaths in the absence of ICC jurisdiction.
  unscaleit <- function(n){
	ihst <- function(x){log(x+sqrt(x^2+1))} # Inv. hyperbolic sine function
	raw.satt <- sinh(ihst(n) + att.pooled) - n # Calculate SATT on the scale of the raw deaths count.
	raw.satt.pct <- 100*raw.satt/n
	satt.raw.quantiles <- quantile(raw.satt.pct, c(0.025, 0.25, 0.5, 0.75, 0.975))
	print(signif(satt.raw.quantiles, 3))
	print(signif(mad(raw.satt.pct),3))
	}
	
   writeLines("")	
   writeLines("# Print the SATT as the percent reduction in battle deaths on the linear scale. Print median absolute deviation (bottom). Show the 2.5th, 25th, 50th, 75th, and 97.5th percentiles as well as the median absolute deviation of this statistic.")
   
   writeLines("")	
   writeLines("# Counterfactual baseline = 5 deaths.  Median absolute deviation printed below.")	
	print(unscaleit(5))
	
   writeLines("")	
   writeLines("# Counterfactual baseline = 80 deaths.  Median absolute deviation printed below.")	
	print(unscaleit(80))
	
   writeLines("")	
   writeLines("# Counterfactual baselines of 100 and 500 deaths. Median absolute deviation printed below.")	
	print(unscaleit(100))
	print(unscaleit(500))

   writeLines("")
   writeLines("# What was the mean and standard deviation of deaths (linear scale, not log scale) in the period for the selected regime type?")
	avdeaths <- data.list[[1]]$deaths
	print(round(mean(avdeaths), 2))
	print(round(sd(avdeaths), 2))

############################################################################################################# 
# Inspect matched observations.
  
  df <- data.frame(match.list[[1]]$model$data, match.list[[1]]$subclass, match.list[[1]]$weights) %>%
  			rename(weight=match.list..1...weights, subclass = match.list..1...subclass) %>% 
  			subset(subset=weight>0, select=c("quarter", "lcode", "year","subclass", "weight")) %>%
  			setorder("subclass", "weight") 			
  mf <- subset(full$imputations[[1]], select = c("quarter","lcode", "country", "leader","RomeStatuteRatification"))
  pairs <- join(df, mf, by = c("quarter", "lcode"), type = "left")
  pairs <- pairs %>% select(subclass, weight, year, RomeStatuteRatification, country, leader,  quarter, lcode)  
  
  # See some matches.
  pairs[which(pairs$subclass==333),]
  pairs[which(pairs$subclass==648),]
  pairs[which(pairs$subclass==149),]
  pairs[which(pairs$subclass==83),]
  
options(warn=2, error=recover)  # Reset warnings option to default.


############################################################################################################# 
# End


